Download the Data

In [ ]:
import requests  
file_url='https://data.cityofchicago.org/api/views/kf7e-cur8/rows.csv?accessType=DOWNLOAD'
r = requests.get(file_url, stream = True)  
  
with open("chicago.csv", "wb") as file:  
    for block in r.iter_content(chunk_size = 1024): 
        if block:  
            file.write(block)

Importations

In [166]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns  #advanced visualization library
import warnings
warnings.filterwarnings('ignore')

Fast EDA

In [167]:
import pandas as pd 
data = pd.read_csv('chicago.csv')

Data description

  • TIME : Timestamp of the record

  • RGION_ID : Unique arbitrary number to represent each region

  • SPEED : Estimated congestion level. Although expressed in miles per hour, this value is more a reflection of the congestion level in the region than it is indicative of the average raw speed vehicles are travelling within the region.

  • REGION : Name of the region.

  • BUS_COUNT : The number of buses used to estimate traffic.

  • NUM_READS : Number of GPS probes received(or used) for estimating the speed for that segment.

  • WEST: Approximate longitude of the west edge of the region.

  • EAST: Approximate longitude of the east edge of the region.

  • SOUTH : Approximate latitude of the south edge of the region.

  • NORTH : Approximate latitude of the north edge of the region.

  • NW_LOACATION : The location corresponding to the intersection of NORTH and WEST in a format that allows for creation of maps and other geographic operations on this data portal.

  • SE_LOCATION : The location corresponding to the intersection of SOUTH and EAST in a format that allows for creation of maps and other geographic operations on this data portal.

In [168]:
data["TIME"]=pd.to_datetime(data["TIME"], format="%m/%d/%Y %I:%M:%S %p")
In [169]:
data=data[~data["SPEED"].isin([0])]
data=data[~data["HOUR"].isin([21,22,23,0,1,2,3,4,5,6])]
In [171]:
data['DAY'] = data['TIME'].dt.day
data['MONTH'] = data['TIME'].dt.month
data['YEAR'] = data['TIME'].dt.year

Faire une liste des regions (j'en aurai besoin pour les maps)

In [172]:
list_REGION = []
for i in range(29) : 
    reg = data[(data['REGION_ID']==i+1)].REGION.unique()[0]
    list_REGION.append(reg)
In [173]:
data = data.groupby(['REGION_ID','MONTH','DAY','YEAR','HOUR','NORTH','WEST','EAST', 'SOUTH','DAY_OF_WEEK'])[['SPEED','BUS_COUNT','NUM_READS']].agg('mean').reset_index()
In [174]:
data["SPEEDKM"]=data["SPEED"]*1.609
In [175]:
data['MINUTE'] = '00'
data['Time'] = pd.to_datetime(data[['YEAR','MONTH','DAY','HOUR','MINUTE']].astype(str).agg('-'.join,axis=1),format='%Y-%m-%d-%H-%M')
In [176]:
data['CENTER_LAT']=data['NORTH']*0.5+0.5*data['SOUTH']
data['CENTER_LON']=data['EAST']*0.5+0.5*data['WEST']
data['Time'] = data.Time.dt.strftime("%a, %d %b, %Y at %l:%M %p")
data.head()
Out[176]:
REGION_ID MONTH DAY YEAR HOUR NORTH WEST EAST SOUTH DAY_OF_WEEK SPEED BUS_COUNT NUM_READS SPEEDKM MINUTE Time CENTER_LAT CENTER_LON
0 1 1 1 2019 7 42.026444 -87.709645 -87.654561 41.997946 3 25.556667 13.166667 321.833333 41.120677 00 Tue, 01 Jan, 2019 at 7:00 AM 42.012195 -87.682103
1 1 1 1 2019 8 42.026444 -87.709645 -87.654561 41.997946 3 25.183333 15.333333 346.666667 40.519983 00 Tue, 01 Jan, 2019 at 8:00 AM 42.012195 -87.682103
2 1 1 1 2019 9 42.026444 -87.709645 -87.654561 41.997946 3 24.568333 16.666667 372.833333 39.530448 00 Tue, 01 Jan, 2019 at 9:00 AM 42.012195 -87.682103
3 1 1 1 2019 10 42.026444 -87.709645 -87.654561 41.997946 3 23.805000 16.333333 370.500000 38.302245 00 Tue, 01 Jan, 2019 at 10:00 AM 42.012195 -87.682103
4 1 1 1 2019 11 42.026444 -87.709645 -87.654561 41.997946 3 23.521667 19.500000 402.166667 37.846362 00 Tue, 01 Jan, 2019 at 11:00 AM 42.012195 -87.682103

first steps with Folium

some things to know about Folium

  • Maps are defined as a folium.Map object, and we can add other folium objects on top of the folium.Map to improve/add to the map rendered
  • Folium allows the users to select different map projections. We will be using the Spherical Mercator projection in this article since we are visualizing a relatively small area size, and it is a more commonly used projection.
  • We can use different map tiles for the map rendered by Folium, such as from OpenStreetMap (The one I will be using in this tutorial), MapBox (Using tiles from MapBox require you to put the API Key that you have for MapBox licence as one of the arguments), and several other tiles that you can see from this github repo folder or this documentation page
In [177]:
import folium 
from folium import plugins
from folium.plugins import HeatMapWithTime
In [178]:
def generateBaseMap(default_location=[40.693943, -73.985880] ,default_zoom_start=10):
    base_map = folium.Map(location=default_location,  control_scale=True, zoom_start=default_zoom_start)
    return base_map
In [179]:
map = generateBaseMap([41.881832, -87.623177])
In [180]:
data['speed'] = 1/ data['SPEED']

HeatMapWithTime

We can also animate our heat maps to change the data being shown on it based on certain dimensions (hour,month) using class method called HeatMapWithTime.

The SPEED in the 29 regions for a whole week (Monday -> Sunday).

I choose the week of the 8th to 14th of april 2019

In [182]:
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 3) & (17 <= data['DAY']) & (data['DAY'] <= 23)].copy()
sub_set = sub_set.replace([np.inf, -np.inf], 0)
l=[]

for date in sub_set.Time.unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
    

Liste des coordonnées des 29 regions

In [183]:
l1 = []
l2= []
for i in range(29) : 
    j = l[0][i]
    l2 = j[0:1] + j[1:2]
    l1.append(l2)
    l2 = []
   
    
In [184]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[184]:
In [185]:
import os 
base_map.save(os.path.join( 'SPEED.html'))

The BUS_COUNT in the 29 regions for a whole week (Monday -> Sunday).

I choose the week of the 21st to 27th of october 2019

In [186]:
sub_set = data[(data['YEAR']==2019) & (data['MONTH'] == 10) & (21 <= data['DAY']) & (data['DAY'] <= 27)].copy()

l=[]

for date in sub_set.Time.unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','BUS_COUNT']].values.tolist() 
    l.append(sub) 
    
In [187]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.unique().tolist() ,radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[187]:
In [188]:
import os 
base_map.save(os.path.join('BUS_COUNT.html'))

In this part am only going focus on Sundays' traffic of 2018 & 2019

In [189]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 1)].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
In [190]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[190]:
In [191]:
import os 
base_map.save(os.path.join('SPEED_SUNDAYS.html'))

In this part am only going focus on Mondays' traffic of 2018 & 2019

In [192]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 2)].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
In [193]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[193]:
In [194]:
import os 
base_map.save(os.path.join('SPEED_MONDAYS.html'))

The traffic on christmas eve (i.e. 24th of decembre 2019) starting from 5 PM and christmas day (the 25th of decembre)

In [195]:
sub_set = data[(data['YEAR']==2019)  &  (data['MONTH']==12) ].copy()
sub_set = sub_set[((sub_set['DAY'] == 24) & (sub_set['HOUR']>=17) ) | (sub_set['DAY'] == 25)]
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
    
In [196]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[196]:
In [197]:
import os 
base_map.save(os.path.join('christmas.html'))

The traffic on rush hours (8 AM and 5 PM) on mondays (2018 & 2019)

In [198]:
sub_set = data[((data['YEAR']==2018) | (data['YEAR']==2019))  & (data['DAY_OF_WEEK'] == 2) & ((data['HOUR']==8) | (data['HOUR']==17) )].copy()
l=[]

for date in sub_set.Time.sort_values().unique():
    sub = sub_set[sub_set['Time'] == date][['CENTER_LAT','CENTER_LON','speed']].values.tolist() 
    l.append(sub) 
In [199]:
base_map = generateBaseMap([41.881832, -87.623177])

HeatMapWithTime(l, index = sub_set.Time.sort_values().unique().tolist(),radius=40, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

fg = folium.FeatureGroup(name='The 29 regions of Chicago')
base_map.add_child(fg)

for i in range(29) :
    
    r = plugins.FeatureGroupSubGroup(fg, str(list_REGION[i]))
    base_map.add_child(r)
    folium.Marker(l1[i]).add_to(r)

folium.LayerControl(collapsed=False).add_to(base_map)


base_map
Out[199]:
In [200]:
import os 
base_map.save(os.path.join('MONDAYS_RUSH_HOURS.html'))
In [ ]: